Group 3:
Lorenz Koch (a24lorko)
Kim Wurster (a24kimwu)
!pip install lime
Collecting lime
Downloading lime-0.2.0.1.tar.gz (275 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 275.7/275.7 kB 5.2 MB/s eta 0:00:00
Preparing metadata (setup.py) ... done
Requirement already satisfied: matplotlib in /usr/local/lib/python3.12/dist-packages (from lime) (3.10.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from lime) (2.0.2)
Requirement already satisfied: scipy in /usr/local/lib/python3.12/dist-packages (from lime) (1.16.2)
Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (from lime) (4.67.1)
Requirement already satisfied: scikit-learn>=0.18 in /usr/local/lib/python3.12/dist-packages (from lime) (1.6.1)
Requirement already satisfied: scikit-image>=0.12 in /usr/local/lib/python3.12/dist-packages (from lime) (0.25.2)
Requirement already satisfied: networkx>=3.0 in /usr/local/lib/python3.12/dist-packages (from scikit-image>=0.12->lime) (3.5)
Requirement already satisfied: pillow>=10.1 in /usr/local/lib/python3.12/dist-packages (from scikit-image>=0.12->lime) (11.3.0)
Requirement already satisfied: imageio!=2.35.0,>=2.33 in /usr/local/lib/python3.12/dist-packages (from scikit-image>=0.12->lime) (2.37.0)
Requirement already satisfied: tifffile>=2022.8.12 in /usr/local/lib/python3.12/dist-packages (from scikit-image>=0.12->lime) (2025.10.4)
Requirement already satisfied: packaging>=21 in /usr/local/lib/python3.12/dist-packages (from scikit-image>=0.12->lime) (25.0)
Requirement already satisfied: lazy-loader>=0.4 in /usr/local/lib/python3.12/dist-packages (from scikit-image>=0.12->lime) (0.4)
Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn>=0.18->lime) (1.5.2)
Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn>=0.18->lime) (3.6.0)
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->lime) (1.3.3)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib->lime) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib->lime) (4.60.1)
Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->lime) (1.4.9)
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib->lime) (3.2.5)
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.12/dist-packages (from matplotlib->lime) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.7->matplotlib->lime) (1.17.0)
Building wheels for collected packages: lime
Building wheel for lime (setup.py) ... done
Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283834 sha256=dff6c07bd9f970a44aeaf1fe723ad765be616752b624e6b0163e74e857f610fb
Stored in directory: /root/.cache/pip/wheels/e7/5d/0e/4b4fff9a47468fed5633211fb3b76d1db43fe806a17fb7486a
Successfully built lime
Installing collected packages: lime
Successfully installed lime-0.2.0.1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import pickle
import shap
import time
import warnings
import lime
from lime.lime_tabular import LimeTabularExplainer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn import preprocessing
import xgboost as xgb
from xgboost import XGBRegressor
SEED = 0
np.random.seed(SEED)
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
def huber_loss(y_true, y_pred, delta=1.0):
"""
calculate Huber loss
delta: 1 or 2
"""
error = y_true - y_pred
squared_error = 0.5 * (error ** 2)
absolute_error = delta * (np.abs(error) - 0.5 * delta)
return np.where(np.abs(error) <= delta, squared_error, absolute_error).mean()
df = pd.read_csv('/content/drive/MyDrive/day-bikesharing.csv', parse_dates=['dteday'],index_col=1) # Load the data
totmodel = XGBRegressor()
regmodel = XGBRegressor()
casmodel = XGBRegressor()
totmodel.load_model("/content/drive/MyDrive/totalmodel_common.json")
regmodel.load_model("/content/drive/MyDrive/regmodel_common.json")
casmodel.load_model("/content/drive/MyDrive/casmodel_common.json")
/usr/local/lib/python3.12/dist-packages/xgboost/sklearn.py:1037: UserWarning: [12:44:35] WARNING: /workspace/src/learner.cc:871: Found JSON model saved before XGBoost 1.6, please save the model using current version again. The support for old JSON model will be discontinued in XGBoost 2.3. self.get_booster().load_model(fname) /usr/local/lib/python3.12/dist-packages/xgboost/sklearn.py:1037: UserWarning: [12:44:36] WARNING: /workspace/src/learner.cc:871: Found JSON model saved before XGBoost 1.6, please save the model using current version again. The support for old JSON model will be discontinued in XGBoost 2.3. self.get_booster().load_model(fname) /usr/local/lib/python3.12/dist-packages/xgboost/sklearn.py:1037: UserWarning: [12:44:38] WARNING: /workspace/src/learner.cc:871: Found JSON model saved before XGBoost 1.6, please save the model using current version again. The support for old JSON model will be discontinued in XGBoost 2.3. self.get_booster().load_model(fname)
df.shape
(731, 15)
def add_features(df):
df['year'] = df.index.year # yr exists already
df['month'] = df.index.month
df['day'] = df.index.day
df['dayofweek'] = df.index.dayofweek
2011.Jan.1 is a Saturday.
Note: 'dayofweek' 5 means a Saturday.
add_features(df)
df.head(3)
| instant | season | yr | mnth | holiday | weekday | workingday | weathersit | temp | atemp | hum | windspeed | casual | registered | cnt | year | month | day | dayofweek | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| dteday | |||||||||||||||||||
| 2011-01-01 | 1 | 1 | 0 | 1 | 0 | 6 | 0 | 2 | 0.344167 | 0.363625 | 0.805833 | 0.160446 | 331 | 654 | 985 | 2011 | 1 | 1 | 5 |
| 2011-01-02 | 2 | 1 | 0 | 1 | 0 | 0 | 0 | 2 | 0.363478 | 0.353739 | 0.696087 | 0.248539 | 131 | 670 | 801 | 2011 | 1 | 2 | 6 |
| 2011-01-03 | 3 | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 0.196364 | 0.189405 | 0.437273 | 0.248309 | 120 | 1229 | 1349 | 2011 | 1 | 3 | 0 |
#This is the normalized version.
# temp: The values are divided to 41 (max)
# atemp: The values are divided to 50 (max)
# hum: The values are divided to 100 (max)
# windspeed: The values are divided to 67 (max)
df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| instant | 731.0 | 366.000000 | 211.165812 | 1.000000 | 183.500000 | 366.000000 | 548.500000 | 731.000000 |
| season | 731.0 | 2.496580 | 1.110807 | 1.000000 | 2.000000 | 3.000000 | 3.000000 | 4.000000 |
| yr | 731.0 | 0.500684 | 0.500342 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 |
| mnth | 731.0 | 6.519836 | 3.451913 | 1.000000 | 4.000000 | 7.000000 | 10.000000 | 12.000000 |
| holiday | 731.0 | 0.028728 | 0.167155 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 |
| weekday | 731.0 | 2.997264 | 2.004787 | 0.000000 | 1.000000 | 3.000000 | 5.000000 | 6.000000 |
| workingday | 731.0 | 0.683995 | 0.465233 | 0.000000 | 0.000000 | 1.000000 | 1.000000 | 1.000000 |
| weathersit | 731.0 | 1.395349 | 0.544894 | 1.000000 | 1.000000 | 1.000000 | 2.000000 | 3.000000 |
| temp | 731.0 | 0.495385 | 0.183051 | 0.059130 | 0.337083 | 0.498333 | 0.655417 | 0.861667 |
| atemp | 731.0 | 0.474354 | 0.162961 | 0.079070 | 0.337842 | 0.486733 | 0.608602 | 0.840896 |
| hum | 731.0 | 0.627894 | 0.142429 | 0.000000 | 0.520000 | 0.626667 | 0.730209 | 0.972500 |
| windspeed | 731.0 | 0.190486 | 0.077498 | 0.022392 | 0.134950 | 0.180975 | 0.233214 | 0.507463 |
| casual | 731.0 | 848.176471 | 686.622488 | 2.000000 | 315.500000 | 713.000000 | 1096.000000 | 3410.000000 |
| registered | 731.0 | 3656.172367 | 1560.256377 | 20.000000 | 2497.000000 | 3662.000000 | 4776.500000 | 6946.000000 |
| cnt | 731.0 | 4504.348837 | 1937.211452 | 22.000000 | 3152.000000 | 4548.000000 | 5956.000000 | 8714.000000 |
| year | 731.0 | 2011.500684 | 0.500342 | 2011.000000 | 2011.000000 | 2012.000000 | 2012.000000 | 2012.000000 |
| month | 731.0 | 6.519836 | 3.451913 | 1.000000 | 4.000000 | 7.000000 | 10.000000 | 12.000000 |
| day | 731.0 | 15.738714 | 8.809949 | 1.000000 | 8.000000 | 16.000000 | 23.000000 | 31.000000 |
| dayofweek | 731.0 | 3.002736 | 2.004787 | 0.000000 | 1.000000 | 3.000000 | 5.000000 | 6.000000 |
Perform one hot encoding on categorical variables. Machine learning algorithms assume (and require) the data to be numeric, so the categorical data must be pre-processed as a numerical data. Categorical variables turn into binary features that are 'one hot' encoded, so if a feature is represented by that column, it receives a 1, otherwise a 0.
# Define the categorical variables
cat_var = ['season', 'holiday', 'workingday', 'weathersit', 'year']
# OneHotEncoder
ohe = OneHotEncoder(categories = 'auto')
# Fit the categorical variables to the encoder
encodeddf = ohe.fit_transform(df[cat_var])
# Create a DataFrame with the encoded value information
cat_df = pd.DataFrame(data = encodeddf.toarray(), columns = ohe.get_feature_names_out(cat_var))
cat_df
| season_1 | season_2 | season_3 | season_4 | holiday_0 | holiday_1 | workingday_0 | workingday_1 | weathersit_1 | weathersit_2 | weathersit_3 | year_2011 | year_2012 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 |
| 1 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 |
| 2 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 3 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 4 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 726 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 |
| 727 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 |
| 728 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 |
| 729 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 730 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 |
731 rows × 13 columns
# rename the column names from 1,2,3,4 to spring, summer, fall, winter
cat_df.rename(columns={'season_1': 'winter', 'season_2': 'spring', 'season_3': 'summer', 'season_4': 'fall'}, inplace=True)
cat_df.rename(columns={'weathersit_1': 'clear', 'weathersit_2': 'cloudy', 'weathersit_3': 'lightsnow.rain'}, inplace=True)
cat_df
| winter | spring | summer | fall | holiday_0 | holiday_1 | workingday_0 | workingday_1 | clear | cloudy | lightsnow.rain | year_2011 | year_2012 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 |
| 1 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 |
| 2 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 3 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 4 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 726 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 |
| 727 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 |
| 728 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 |
| 729 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 |
| 730 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 |
731 rows × 13 columns
Machine learning is sensitive to the relative scales of numerical variables. The range of all variables need to be normalized so that each feature contributes approximately proportionately to the calculation.
# Define the numerical variables
num_var = ['temp', 'atemp', 'hum', 'windspeed', 'month', 'day', 'dayofweek']
Create standardized, normalized
# StandardScaler object
scaler = StandardScaler()
# Fit the data to the scaler
numscaled = scaler.fit_transform(df[num_var])
Keep the original for later view
# for inverse transformation
inversed = scaler.inverse_transform(numscaled)
print(inversed)
[[ 0.344167 0.363625 0.805833 ... 1. 1. 5. ] [ 0.363478 0.353739 0.696087 ... 1. 2. 6. ] [ 0.196364 0.189405 0.437273 ... 1. 3. 0. ] ... [ 0.253333 0.2424 0.752917 ... 12. 29. 5. ] [ 0.255833 0.2317 0.483333 ... 12. 30. 6. ] [ 0.215833 0.223487 0.5775 ... 12. 31. 0. ]]
# Create DataFrame with original data
inversed_df = pd.DataFrame(data = inversed, columns = num_var)
inversed_df
# Calculate based on UCI info to retrieve the actual temperature and weather information
#temp : Normalized temperature in Celsius. The values are divided to 41 (max)
#atemp: Normalized feeling temperature in Celsius. The values are divided to 50 (max)
#hum: Normalized humidity. The values are divided to 100 (max)
#windspeed: Normalized wind speed. The values are divided to 67 (max)
inversed_origin = pd.concat([inversed_df.iloc[:,0] * 41, #temperature
inversed_df.iloc[:,1] * 50, #feeling temperature
inversed_df.iloc[:,2] * 100, #humidity
inversed_df.iloc[:,3] * 67], #wind speed
axis=1)
inversed_origin
| temp | atemp | hum | windspeed | |
|---|---|---|---|---|
| 0 | 14.110847 | 18.18125 | 80.5833 | 10.749882 |
| 1 | 14.902598 | 17.68695 | 69.6087 | 16.652113 |
| 2 | 8.050924 | 9.47025 | 43.7273 | 16.636703 |
| 3 | 8.200000 | 10.60610 | 59.0435 | 10.739832 |
| 4 | 9.305237 | 11.46350 | 43.6957 | 12.522300 |
| ... | ... | ... | ... | ... |
| 726 | 10.420847 | 11.33210 | 65.2917 | 23.458911 |
| 727 | 10.386653 | 12.75230 | 59.0000 | 10.416557 |
| 728 | 10.386653 | 12.12000 | 75.2917 | 8.333661 |
| 729 | 10.489153 | 11.58500 | 48.3333 | 23.500518 |
| 730 | 8.849153 | 11.17435 | 57.7500 | 10.374682 |
731 rows × 4 columns
# timeseries of temp, atemp, humidity, windspeed
ax = inversed_origin.set_index(inversed_origin.index).plot(figsize=(20,5))
ax.set_xlabel('daily time (# rows)')
Text(0.5, 0, 'daily time (# rows)')
X_original = pd.concat([cat_df, inversed_origin, inversed_df[['month', 'day', 'dayofweek']]], axis=1)
X_original # Use this for SHAP plot view
| winter | spring | summer | fall | holiday_0 | holiday_1 | workingday_0 | workingday_1 | clear | cloudy | lightsnow.rain | year_2011 | year_2012 | temp | atemp | hum | windspeed | month | day | dayofweek | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 14.110847 | 18.18125 | 80.5833 | 10.749882 | 1.0 | 1.0 | 5.0 |
| 1 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 14.902598 | 17.68695 | 69.6087 | 16.652113 | 1.0 | 2.0 | 6.0 |
| 2 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 8.050924 | 9.47025 | 43.7273 | 16.636703 | 1.0 | 3.0 | 0.0 |
| 3 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 8.200000 | 10.60610 | 59.0435 | 10.739832 | 1.0 | 4.0 | 1.0 |
| 4 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 9.305237 | 11.46350 | 43.6957 | 12.522300 | 1.0 | 5.0 | 2.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 726 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 10.420847 | 11.33210 | 65.2917 | 23.458911 | 12.0 | 27.0 | 3.0 |
| 727 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 10.386653 | 12.75230 | 59.0000 | 10.416557 | 12.0 | 28.0 | 4.0 |
| 728 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 10.386653 | 12.12000 | 75.2917 | 8.333661 | 12.0 | 29.0 | 5.0 |
| 729 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 10.489153 | 11.58500 | 48.3333 | 23.500518 | 12.0 | 30.0 | 6.0 |
| 730 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 8.849153 | 11.17435 | 57.7500 | 10.374682 | 12.0 | 31.0 | 0.0 |
731 rows × 20 columns
# Create DataFrame with numerically scaled data
num_df = pd.DataFrame(data = numscaled, columns = num_var)
num_df
| temp | atemp | hum | windspeed | month | day | dayofweek | |
|---|---|---|---|---|---|---|---|
| 0 | -0.826662 | -0.679946 | 1.250171 | -0.387892 | -1.600161 | -1.674108 | 0.996930 |
| 1 | -0.721095 | -0.740652 | 0.479113 | 0.749602 | -1.600161 | -1.560522 | 1.496077 |
| 2 | -1.634657 | -1.749767 | -1.339274 | 0.746632 | -1.600161 | -1.446936 | -1.498809 |
| 3 | -1.614780 | -1.610270 | -0.263182 | -0.389829 | -1.600161 | -1.333351 | -0.999661 |
| 4 | -1.467414 | -1.504971 | -1.341494 | -0.046307 | -1.600161 | -1.219765 | -0.500513 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 726 | -1.318665 | -1.521108 | 0.175807 | 2.061426 | 1.588660 | 1.279122 | -0.001366 |
| 727 | -1.323224 | -1.346690 | -0.266238 | -0.452131 | 1.588660 | 1.392707 | 0.497782 |
| 728 | -1.323224 | -1.424344 | 0.878392 | -0.853552 | 1.588660 | 1.506293 | 0.996930 |
| 729 | -1.309558 | -1.490049 | -1.015664 | 2.069444 | 1.588660 | 1.619879 | 1.496077 |
| 730 | -1.528225 | -1.540482 | -0.354061 | -0.460201 | 1.588660 | 1.733465 | -1.498809 |
731 rows × 7 columns
Define X (input): Concatenate one hot encoded categorical + normalized numerical data
Xdf = pd.concat([cat_df, num_df], axis = 1)
print(Xdf.shape)
Xdf.head(5)
(731, 20)
| winter | spring | summer | fall | holiday_0 | holiday_1 | workingday_0 | workingday_1 | clear | cloudy | lightsnow.rain | year_2011 | year_2012 | temp | atemp | hum | windspeed | month | day | dayofweek | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | -0.826662 | -0.679946 | 1.250171 | -0.387892 | -1.600161 | -1.674108 | 0.996930 |
| 1 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | -0.721095 | -0.740652 | 0.479113 | 0.749602 | -1.600161 | -1.560522 | 1.496077 |
| 2 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | -1.634657 | -1.749767 | -1.339274 | 0.746632 | -1.600161 | -1.446936 | -1.498809 |
| 3 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | -1.614780 | -1.610270 | -0.263182 | -0.389829 | -1.600161 | -1.333351 | -0.999661 |
| 4 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | -1.467414 | -1.504971 | -1.341494 | -0.046307 | -1.600161 | -1.219765 | -0.500513 |
# Save column names for SHAP usage
NAMES = Xdf.columns
NAMES
Index(['winter', 'spring', 'summer', 'fall', 'holiday_0', 'holiday_1',
'workingday_0', 'workingday_1', 'clear', 'cloudy', 'lightsnow.rain',
'year_2011', 'year_2012', 'temp', 'atemp', 'hum', 'windspeed', 'month',
'day', 'dayofweek'],
dtype='object')
plt.subplots(figsize=(8, 6))
sb.heatmap(Xdf.corr(), cmap='coolwarm')
<Axes: >
# Check if there is any non available
Xdf.isna().any().sum()
np.int64(0)
# Here, indicate which target to predict (cnt or casual or registered)
y_tot = df['cnt']
# Normalize target data: Calculate mean and standard deviation
y_mu = y_tot.mean(0)
y_sd = y_tot.std(0)
print('y total mean:', y_mu, ' y total std:' , y_sd)
y_tot_norm = (y_tot - y_mu) / y_sd
y_tot_norm = y_tot_norm.values.ravel() #dataframe to 1D array
#y_norm
y total mean: 4504.3488372093025 y total std: 1937.2114516187678
# Here, indicate which target to predict (cnt or casual or registered)
# For the different models for each customer in the assignment, change the target
y_reg = df['registered']
# Normalize target data: Calculate mean and standard deviation
y_mu = y_reg.mean(0)
y_sd = y_reg.std(0)
print('y registered mean:', y_mu, ' y registered std:' , y_sd)
y_reg_norm = (y_reg - y_mu) / y_sd
y_reg_norm = y_reg_norm.values.ravel() #dataframe to 1D array
y registered mean: 3656.172366621067 y registered std: 1560.2563770194527
# Here, indicate which target to predict (cnt or casual or registered)
# For the different models for each customer in the assignment, change the target
y_cas = df['casual']
# Normalize target data: Calculate mean and standard deviation
y_mu = y_cas.mean(0)
y_sd = y_cas.std(0)
print('y casual mean:', y_mu, ' y casual std:' , y_sd)
y_cas_norm = (y_cas - y_mu) / y_sd
y_cas_norm = y_cas_norm.values.ravel() #dataframe to 1D array
y casual mean: 848.1764705882352 y casual std: 686.6224882846549
len(y_reg)
731
# Split the data into training, validation, and test data:
X_train_tot, X_test_tot, y_train_tot, y_test_tot = train_test_split(Xdf, y_tot_norm, test_size = 0.2, random_state=SEED)
# Split the data into training, validation, and test data:
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(Xdf, y_reg_norm, test_size = 0.2, random_state=SEED)
# Split the data into training, validation, and test data:
X_train_cas, X_test_cas, y_train_cas, y_test_cas = train_test_split(Xdf, y_cas_norm, test_size = 0.2, random_state=SEED)
# Check parameters
print(totmodel.get_params())
{'objective': 'reg:pseudohubererror', 'base_score': '5E-1', 'booster': 'gbtree', 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': ['float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float'], 'feature_weights': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': None, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': None, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': None, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': None, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': None}
# Check parameters
print(regmodel.get_params())
{'objective': 'reg:squarederror', 'base_score': '5E-1', 'booster': 'gbtree', 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': ['float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float'], 'feature_weights': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': None, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': None, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': None, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': None, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': None}
# Check parameters
print(casmodel.get_params())
{'objective': 'reg:pseudohubererror', 'base_score': '5E-1', 'booster': 'gbtree', 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': ['float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float', 'float'], 'feature_weights': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': None, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': None, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': None, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': None, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': None}
y_pred = totmodel.predict(X_test_tot)
r2 = r2_score(y_test_tot, y_pred)
#MAE
test_mae = mean_absolute_error(y_test_tot, y_pred)
# Huber loss
huber_loss_value = huber_loss(y_test_tot, y_pred, delta=1.0)
print(f"Total count(cnt) model, R2:{r2:.3f}, MAE:{test_mae:.3f}, Huber loss: {huber_loss_value:.3f}")
Total count(cnt) model, R2:0.964, MAE:0.144, Huber loss: 0.020
y_pred = regmodel.predict(X_test_reg)
r2 = r2_score(y_test_reg, y_pred)
#MAE
test_mae = mean_absolute_error(y_test_reg, y_pred)
# Huber loss
huber_loss_value = huber_loss(y_test_reg, y_pred, delta=1.0)
print(f"Registered model, R2:{r2:.3f}, MAE:{test_mae:.3f}, Huber loss: {huber_loss_value:.3f}")
Registered model, R2:0.971, MAE:0.138, Huber loss: 0.016
y_pred = casmodel.predict(X_test_cas)
r2 = r2_score(y_test_cas, y_pred)
#MAE
test_mae = mean_absolute_error(y_test_cas, y_pred)
# Huber loss
huber_loss_value = huber_loss(y_test_cas, y_pred, delta=1.0)
print(f"Casual model, R2:{r2:.3f}, MAE:{test_mae:.3f}, Huber loss: {huber_loss_value:.3f}")
Casual model, R2:0.922, MAE:0.181, Huber loss: 0.042
Now we will apply SHAP (SHapley Additive exPlanations) to understand and compare how different features influence predictions across the three models (total, registered, and casual users).
# Initialize JavaScript for SHAP visualizations
shap.initjs()
# Create SHAP explainers for each model
explainer_tot = shap.TreeExplainer(totmodel)
explainer_reg = shap.TreeExplainer(regmodel)
explainer_cas = shap.TreeExplainer(casmodel)
# Calculate SHAP values for each model using the normalized data
shap_values_tot = explainer_tot.shap_values(Xdf)
shap_values_reg = explainer_reg.shap_values(Xdf)
shap_values_cas = explainer_cas.shap_values(Xdf)
The summary plots below show the feature importance and impact direction for each model. Features are ordered by importance (top to bottom), with each point representing a sample. The color indicates the feature value (red=high, blue=low) and the x-axis shows the SHAP value (impact on prediction).
# Create summary plots for all three models
plt.figure(figsize=(15, 20))
# Total bike count model
plt.subplot(3, 1, 1)
shap.summary_plot(shap_values_tot, X_original, feature_names=NAMES, show=False)
plt.title('SHAP Summary Plot - Total Bike Count Model', fontsize=16, fontweight='bold')
# Registered users model
plt.subplot(3, 1, 2)
shap.summary_plot(shap_values_reg, X_original, feature_names=NAMES, show=False)
plt.title('SHAP Summary Plot - Registered Users Model', fontsize=16, fontweight='bold')
# Casual users model
plt.subplot(3, 1, 3)
shap.summary_plot(shap_values_cas, X_original, feature_names=NAMES, show=False)
plt.title('SHAP Summary Plot - Casual Users Model', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()
/tmp/ipython-input-4053996936.py:6: FutureWarning: The NumPy global RNG was seeded by calling `np.random.seed`. In a future version this function will no longer use the global RNG. Pass `rng` explicitly to opt-in to the new behaviour and silence this warning. shap.summary_plot(shap_values_tot, X_original, feature_names=NAMES, show=False) /tmp/ipython-input-4053996936.py:11: FutureWarning: The NumPy global RNG was seeded by calling `np.random.seed`. In a future version this function will no longer use the global RNG. Pass `rng` explicitly to opt-in to the new behaviour and silence this warning. shap.summary_plot(shap_values_reg, X_original, feature_names=NAMES, show=False) /tmp/ipython-input-4053996936.py:16: FutureWarning: The NumPy global RNG was seeded by calling `np.random.seed`. In a future version this function will no longer use the global RNG. Pass `rng` explicitly to opt-in to the new behaviour and silence this warning. shap.summary_plot(shap_values_cas, X_original, feature_names=NAMES, show=False)
1. Total Bike Count Model (Top plot):
year_2011 - Most important featureatemp (feeling temperature)temp (temperature)hum (humidity)winter (season)monthwindspeeddayofweekyear_2012fall (season)clear (weather)daylightsnow.rain (weather)2. Registered Users Model (Middle plot):
year_2011 - Dominates feature importancetemp (temperature)year_2012atemp (feeling temperature)hum (humidity)workingday_0 (non-working days)monthwinter (season)dayofweekwindspeedworkingday_1 (working days)dayclear (weather)3. Casual Users Model (Bottom plot):
workingday_0 (non-working days) - Most importantatemp (feeling temperature)temp (temperature)year_2011workingday_1 (working days)hum (humidity)dayofweekwindspeedyear_2012clear (weather)dayspring (season)summer (season)shap_interaction_values_tot = explainer_tot.shap_interaction_values(Xdf)
shap_interaction_values_reg = explainer_reg.shap_interaction_values(Xdf)
shap_interaction_values_cas = explainer_cas.shap_interaction_values(Xdf)
shap_values_tot = explainer_tot.shap_values(Xdf)
shap_values_reg = explainer_reg.shap_values(Xdf)
shap_values_cas = explainer_cas.shap_values(Xdf)
# Compute means
mean_casual = df["casual"].mean()
mean_reg = df["registered"].mean()
print(f"Average casual rentals: {mean_casual:.0f}")
print(f"Average registered rentals: {mean_reg:.0f}")
# Helper function to get top/bottom rows
def extreme_indices(df, column, workingday_flag, n=6):
subset = df[df["workingday"] == workingday_flag]
high_idx = subset[column].nlargest(n).index.tolist()
low_idx = subset[column].nsmallest(n).index.tolist()
return high_idx, low_idx
# --- Find extreme days ---
# Casual customers (0 = non-working, 1 = working)
cas_high_nonwork, cas_low_nonwork = extreme_indices(df, "casual", 0)
cas_high_work, cas_low_work = extreme_indices(df, "casual", 1)
# Registered customers
reg_high_nonwork, reg_low_nonwork = extreme_indices(df, "registered", 0)
reg_high_work, reg_low_work = extreme_indices(df, "registered", 1)
# --- Combine unique indices ---
selected_indices = list(set(
cas_high_nonwork + cas_low_nonwork +
cas_high_work + cas_low_work +
reg_high_nonwork + reg_low_nonwork +
reg_high_work + reg_low_work
))
df['row_number'] = range(len(df))
# Show concise table of selected extreme instances
cols = ["workingday", "casual", "registered", "row_number"]
extreme_days = df.loc[selected_indices, cols].sort_index()
#print("\nSelected extreme-day instances:\n")
#print(extreme_days)
def label_reason(row):
if row.name in cas_high_nonwork:
return "High casual - non-working"
elif row.name in cas_low_nonwork:
return "Low casual - non-working"
elif row.name in cas_high_work:
return "High casual - working"
elif row.name in cas_low_work:
return "Low casual - working"
elif row.name in reg_high_nonwork:
return "High registered - non-working"
elif row.name in reg_low_nonwork:
return "Low registered - non-working"
elif row.name in reg_high_work:
return "High registered - working"
elif row.name in reg_low_work:
return "Low registered - working"
else:
return "Other"
extreme_days["reason"] = extreme_days.apply(label_reason, axis=1)
print("\nDetailed extreme cases:")
print(extreme_days)
list_lecture = [8, 63, 91, 106, 130, 145, 301]
lecture_indices = df.index[df['row_number'].isin(list_lecture)].tolist()
Lecture_examples = df.loc[lecture_indices, cols].sort_index()
print("\nLecture examples:")
print(Lecture_examples)
Average casual rentals: 848
Average registered rentals: 3656
Detailed extreme cases:
workingday casual registered row_number \
dteday
2011-01-01 0 331 654 0
2011-01-02 0 131 670 1
2011-01-08 0 68 891 7
2011-01-09 0 54 768 8
2011-01-12 1 25 1137 11
2011-01-18 1 9 674 17
2011-01-22 0 93 888 21
2011-01-26 1 34 472 25
2011-01-27 1 15 416 26
2011-03-06 0 114 491 64
2011-03-10 1 46 577 68
2011-07-04 0 3065 2978 184
2011-10-29 0 57 570 301
2011-12-07 1 50 655 340
2011-12-25 0 303 451 358
2012-01-21 0 67 1234 385
2012-02-12 0 73 1456 407
2012-03-17 0 3155 4681 441
2012-03-23 1 2469 5893 447
2012-04-06 1 1807 4653 461
2012-04-07 0 3252 3605 462
2012-05-18 1 1521 6118 503
2012-05-19 0 3410 4884 504
2012-05-27 0 3283 3308 512
2012-06-02 0 2795 5325 518
2012-06-15 1 1563 6102 531
2012-09-09 0 2570 5657 617
2012-09-12 1 1050 6820 620
2012-09-15 0 3160 5554 623
2012-09-21 1 1250 6917 629
2012-09-22 0 2512 5883 630
2012-09-23 0 2454 5453 631
2012-09-26 1 787 6946 634
2012-09-29 0 2589 5966 637
2012-10-03 1 728 6844 641
2012-10-05 1 1516 6640 643
2012-10-10 1 780 6911 648
2012-10-24 1 795 6898 662
2012-10-29 1 2 20 667
2012-11-23 1 1603 2307 692
2012-12-25 0 440 573 724
2012-12-26 1 9 432 725
reason
dteday
2011-01-01 Low registered - non-working
2011-01-02 Low registered - non-working
2011-01-08 Low casual - non-working
2011-01-09 Low casual - non-working
2011-01-12 Low casual - working
2011-01-18 Low casual - working
2011-01-22 Low casual - non-working
2011-01-26 Low casual - working
2011-01-27 Low casual - working
2011-03-06 Low registered - non-working
2011-03-10 Low registered - working
2011-07-04 High casual - non-working
2011-10-29 Low casual - non-working
2011-12-07 Low registered - working
2011-12-25 Low registered - non-working
2012-01-21 Low casual - non-working
2012-02-12 Low casual - non-working
2012-03-17 High casual - non-working
2012-03-23 High casual - working
2012-04-06 High casual - working
2012-04-07 High casual - non-working
2012-05-18 High casual - working
2012-05-19 High casual - non-working
2012-05-27 High casual - non-working
2012-06-02 High registered - non-working
2012-06-15 High casual - working
2012-09-09 High registered - non-working
2012-09-12 High registered - working
2012-09-15 High casual - non-working
2012-09-21 High registered - working
2012-09-22 High registered - non-working
2012-09-23 High registered - non-working
2012-09-26 High registered - working
2012-09-29 High registered - non-working
2012-10-03 High registered - working
2012-10-05 High casual - working
2012-10-10 High registered - working
2012-10-24 High registered - working
2012-10-29 Low casual - working
2012-11-23 High casual - working
2012-12-25 Low registered - non-working
2012-12-26 Low casual - working
Lecture examples:
workingday casual registered row_number
dteday
2011-01-09 0 54 768 8
2011-03-05 0 640 1437 63
2011-04-02 0 898 1354 91
2011-04-17 0 1558 2186 106
2011-05-11 1 550 3632 130
2011-05-26 1 758 3919 145
2011-10-29 0 57 570 301
Selected instances: 11 (low casual - working), 106 (High casual - non-working), 145 (high registered - working), 301 (low registered (and low casual) - non-working).
indices = [11, 106, 145, 301]
df_indices = df.index[df['row_number'].isin(indices)].tolist()
table = df.loc[df_indices, cols].sort_index()
print("\nFour Instances:")
print(table)
Four Instances:
workingday casual registered row_number
dteday
2011-01-12 1 25 1137 11
2011-04-17 0 1558 2186 106
2011-05-26 1 758 3919 145
2011-10-29 0 57 570 301
categorical_features = [0,1,2,3,4,5,6,7,8,9,10,11,12]
categorical_names = {}
for feature in categorical_features:
le = LabelEncoder()
le.fit(Xdf.iloc[:, feature])
categorical_names[feature] = le.classes_
lime_explainer = LimeTabularExplainer(X_original.values, feature_names=NAMES,
categorical_features=categorical_features,
categorical_names=categorical_names,
verbose=True, mode='regression')
# LIME plot for instance 106 (casual model)
exp_cas = lime_explainer.explain_instance(X_original.iloc[11].values, casmodel.predict, num_features=10)
exp_cas.show_in_notebook(show_all=False)
display(pd.DataFrame(exp_cas.as_list(), columns=['Feature', 'Contribution']))
Intercept -0.683274326202949 Prediction_local [-1.77928484] Right: -1.6499407
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.327938 |
| 1 | workingday_1=1.0 | -0.285837 |
| 2 | year_2012=0.0 | -0.183970 |
| 3 | workingday_0=0.0 | -0.178654 |
| 4 | 1.00 < dayofweek <= 3.00 | -0.165183 |
| 5 | lightsnow.rain=0.0 | 0.097180 |
| 6 | holiday_0=1.0 | -0.089730 |
| 7 | spring=0.0 | -0.076094 |
| 8 | clear=1.0 | 0.070518 |
| 9 | cloudy=0.0 | 0.043698 |
# LIME visualizations for all four selected instances
for idx in indices:
exp_cas = lime_explainer.explain_instance(X_original.iloc[idx].values, casmodel.predict, num_features=10)
exp_reg = lime_explainer.explain_instance(X_original.iloc[idx].values, regmodel.predict, num_features=10)
print(f"LIME explanation for instance {idx} (casual model):")
exp_cas.show_in_notebook(show_all=False)
display(pd.DataFrame(exp_cas.as_list(), columns=['Feature', 'Contribution']))
print(f"LIME explanation for instance {idx} (registered model):")
exp_reg.show_in_notebook(show_all=False)
display(pd.DataFrame(exp_reg.as_list(), columns=['Feature', 'Contribution']))
Intercept -0.6374120916901262 Prediction_local [-1.76382773] Right: -1.6499407 Intercept -1.2196527841695883 Prediction_local [-1.36958699] Right: -1.4041109 LIME explanation for instance 11 (casual model):
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.327624 |
| 1 | workingday_1=1.0 | -0.293591 |
| 2 | workingday_0=0.0 | -0.183743 |
| 3 | year_2012=0.0 | -0.180156 |
| 4 | 1.00 < dayofweek <= 3.00 | -0.166329 |
| 5 | holiday_0=1.0 | -0.085515 |
| 6 | lightsnow.rain=0.0 | 0.081296 |
| 7 | spring=0.0 | -0.076004 |
| 8 | clear=1.0 | 0.058459 |
| 9 | winter=1.0 | 0.046792 |
LIME explanation for instance 11 (registered model):
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.560957 |
| 1 | lightsnow.rain=0.0 | 0.451061 |
| 2 | year_2012=0.0 | -0.293003 |
| 3 | workingday_0=0.0 | 0.273318 |
| 4 | winter=1.0 | -0.151070 |
| 5 | clear=1.0 | 0.109852 |
| 6 | workingday_1=1.0 | 0.095148 |
| 7 | fall=0.0 | -0.045469 |
| 8 | 1.00 < dayofweek <= 3.00 | -0.040967 |
| 9 | holiday_1=0.0 | 0.012153 |
Intercept -1.2449365033348365 Prediction_local [-1.23817413] Right: -1.3963554 Intercept -0.9990724845036394 Prediction_local [-1.58675694] Right: -1.487825 LIME explanation for instance 106 (casual model):
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.329842 |
| 1 | workingday_1=0.0 | 0.292531 |
| 2 | workingday_0=1.0 | 0.190411 |
| 3 | year_2012=0.0 | -0.181004 |
| 4 | dayofweek > 5.00 | -0.152603 |
| 5 | lightsnow.rain=0.0 | 0.075857 |
| 6 | holiday_0=1.0 | -0.072850 |
| 7 | spring=1.0 | 0.072333 |
| 8 | clear=1.0 | 0.069537 |
| 9 | cloudy=0.0 | 0.042391 |
LIME explanation for instance 106 (registered model):
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.555772 |
| 1 | lightsnow.rain=0.0 | 0.453522 |
| 2 | year_2012=0.0 | -0.290765 |
| 3 | workingday_0=1.0 | -0.273213 |
| 4 | winter=0.0 | 0.151743 |
| 5 | clear=1.0 | 0.108659 |
| 6 | workingday_1=0.0 | -0.094718 |
| 7 | fall=0.0 | -0.046422 |
| 8 | dayofweek > 5.00 | -0.033903 |
| 9 | cloudy=0.0 | -0.006814 |
Intercept -0.7102611967004622 Prediction_local [-1.71307003] Right: -1.7037699 Intercept -1.3715691869628366 Prediction_local [-1.22247824] Right: -1.2519995 LIME explanation for instance 145 (casual model):
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.320403 |
| 1 | workingday_1=1.0 | -0.288260 |
| 2 | year_2012=0.0 | -0.189864 |
| 3 | workingday_0=0.0 | -0.184941 |
| 4 | 1.00 < dayofweek <= 3.00 | -0.167340 |
| 5 | holiday_0=1.0 | -0.129667 |
| 6 | lightsnow.rain=0.0 | 0.104347 |
| 7 | clear=1.0 | 0.071481 |
| 8 | spring=1.0 | 0.067126 |
| 9 | cloudy=0.0 | 0.034713 |
LIME explanation for instance 145 (registered model):
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.560503 |
| 1 | lightsnow.rain=0.0 | 0.454992 |
| 2 | year_2012=0.0 | -0.296389 |
| 3 | workingday_0=0.0 | 0.273425 |
| 4 | winter=0.0 | 0.147800 |
| 5 | clear=1.0 | 0.109626 |
| 6 | workingday_1=1.0 | 0.096463 |
| 7 | fall=0.0 | -0.043647 |
| 8 | 1.00 < dayofweek <= 3.00 | -0.042228 |
| 9 | holiday_1=0.0 | 0.009552 |
Intercept -0.8990137757874299 Prediction_local [-1.47941971] Right: -1.6492668 Intercept -0.4858631930244925 Prediction_local [-2.09719067] Right: -1.9441367 LIME explanation for instance 301 (casual model):
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.338442 |
| 1 | workingday_1=0.0 | 0.279664 |
| 2 | year_2012=0.0 | -0.193622 |
| 3 | workingday_0=1.0 | 0.191643 |
| 4 | 3.00 < dayofweek <= 5.00 | -0.180630 |
| 5 | holiday_0=1.0 | -0.090130 |
| 6 | clear=0.0 | -0.076954 |
| 7 | spring=0.0 | -0.075779 |
| 8 | lightsnow.rain=1.0 | -0.057683 |
| 9 | holiday_1=0.0 | -0.038474 |
LIME explanation for instance 301 (registered model):
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.556889 |
| 1 | lightsnow.rain=1.0 | -0.449927 |
| 2 | year_2012=0.0 | -0.291317 |
| 3 | workingday_0=1.0 | -0.271437 |
| 4 | winter=0.0 | 0.143163 |
| 5 | clear=0.0 | -0.110116 |
| 6 | workingday_1=0.0 | -0.091571 |
| 7 | fall=1.0 | 0.046969 |
| 8 | 3.00 < dayofweek <= 5.00 | -0.043997 |
| 9 | holiday_1=0.0 | 0.013793 |
# SHAP local explanations for the four selected instances (same as LIME above)
for idx in indices:
print(f"SHAP force plot for instance {idx} (casual count model):")
shap.initjs()
display(pd.Series(shap_values_cas[idx], index=NAMES).sort_values(ascending=False))
display(shap.force_plot(explainer_cas.expected_value, shap_values_cas[idx], X_original.iloc[idx], feature_names=NAMES))
print(f"SHAP force plot for instance {idx} (registered model):")
shap.initjs()
display(pd.Series(shap_values_reg[idx], index=NAMES).sort_values(ascending=False))
display(shap.force_plot(explainer_reg.expected_value, shap_values_reg[idx], X_original.iloc[idx], feature_names=NAMES))
SHAP force plot for instance 11 (casual count model):
| 0 | |
|---|---|
| hum | 0.031917 |
| clear | 0.027068 |
| lightsnow.rain | 0.003181 |
| fall | 0.000091 |
| holiday_1 | -0.000196 |
| holiday_0 | -0.001233 |
| winter | -0.002748 |
| cloudy | -0.003389 |
| summer | -0.008178 |
| day | -0.010265 |
| spring | -0.017550 |
| year_2012 | -0.021950 |
| workingday_1 | -0.043530 |
| windspeed | -0.061354 |
| dayofweek | -0.068851 |
| workingday_0 | -0.087454 |
| month | -0.092012 |
| year_2011 | -0.100617 |
| temp | -0.313754 |
| atemp | -0.475166 |
SHAP force plot for instance 11 (registered model):
| 0 | |
|---|---|
| hum | 0.079594 |
| workingday_0 | 0.079163 |
| dayofweek | 0.053769 |
| clear | 0.026147 |
| workingday_1 | 0.025656 |
| lightsnow.rain | 0.016438 |
| day | 0.012243 |
| spring | 0.011369 |
| holiday_0 | 0.000627 |
| holiday_1 | 0.000312 |
| cloudy | -0.004005 |
| summer | -0.004103 |
| windspeed | -0.041133 |
| fall | -0.041961 |
| year_2012 | -0.160030 |
| winter | -0.230065 |
| month | -0.270721 |
| atemp | -0.281306 |
| year_2011 | -0.357346 |
| temp | -0.419476 |
SHAP force plot for instance 106 (casual count model):
| 0 | |
|---|---|
| workingday_0 | 0.455705 |
| workingday_1 | 0.206367 |
| hum | 0.196343 |
| month | 0.108391 |
| dayofweek | 0.076772 |
| spring | 0.066041 |
| atemp | 0.062159 |
| clear | 0.042426 |
| day | 0.037109 |
| temp | 0.033074 |
| lightsnow.rain | 0.004520 |
| winter | -0.000281 |
| holiday_1 | -0.000316 |
| fall | -0.000504 |
| holiday_0 | -0.001055 |
| summer | -0.004489 |
| cloudy | -0.004753 |
| year_2012 | -0.062290 |
| windspeed | -0.169491 |
| year_2011 | -0.201528 |
SHAP force plot for instance 106 (registered model):
| 0 | |
|---|---|
| hum | 0.107004 |
| winter | 0.064897 |
| temp | 0.034271 |
| atemp | 0.030809 |
| clear | 0.024176 |
| lightsnow.rain | 0.016961 |
| day | 0.010481 |
| holiday_0 | 0.000641 |
| holiday_1 | 0.000316 |
| summer | -0.003697 |
| cloudy | -0.005982 |
| spring | -0.026842 |
| fall | -0.028991 |
| windspeed | -0.062197 |
| workingday_1 | -0.072369 |
| dayofweek | -0.109188 |
| month | -0.175189 |
| year_2012 | -0.185999 |
| workingday_0 | -0.186220 |
| year_2011 | -0.398411 |
SHAP force plot for instance 145 (casual count model):
| 0 | |
|---|---|
| atemp | 0.201938 |
| temp | 0.119812 |
| hum | 0.047523 |
| spring | 0.032508 |
| month | 0.030590 |
| clear | 0.018241 |
| lightsnow.rain | 0.004391 |
| day | 0.004117 |
| fall | 0.000160 |
| winter | 0.000102 |
| cloudy | -0.000185 |
| holiday_1 | -0.000664 |
| holiday_0 | -0.003262 |
| summer | -0.007866 |
| windspeed | -0.012769 |
| year_2012 | -0.037227 |
| dayofweek | -0.085139 |
| workingday_1 | -0.097859 |
| year_2011 | -0.162108 |
| workingday_0 | -0.215469 |
SHAP force plot for instance 145 (registered model):
| 0 | |
|---|---|
| temp | 0.184062 |
| atemp | 0.171755 |
| dayofweek | 0.098787 |
| workingday_0 | 0.096826 |
| winter | 0.080765 |
| month | 0.045918 |
| workingday_1 | 0.041585 |
| clear | 0.036407 |
| lightsnow.rain | 0.018281 |
| holiday_0 | 0.000641 |
| holiday_1 | 0.000315 |
| cloudy | -0.003505 |
| summer | -0.003564 |
| hum | -0.008209 |
| fall | -0.015369 |
| windspeed | -0.017574 |
| spring | -0.023692 |
| day | -0.026146 |
| year_2012 | -0.180036 |
| year_2011 | -0.401323 |
SHAP force plot for instance 301 (casual count model):
| 0 | |
|---|---|
| workingday_0 | 0.174589 |
| workingday_1 | 0.100857 |
| dayofweek | 0.075948 |
| fall | 0.009313 |
| holiday_1 | -0.000301 |
| month | -0.000857 |
| holiday_0 | -0.000971 |
| summer | -0.003025 |
| day | -0.013174 |
| spring | -0.013510 |
| cloudy | -0.014471 |
| winter | -0.021110 |
| year_2012 | -0.043691 |
| clear | -0.059729 |
| year_2011 | -0.063010 |
| lightsnow.rain | -0.140008 |
| temp | -0.432425 |
| hum | -0.466626 |
| atemp | -0.506161 |
| windspeed | -0.512235 |
SHAP force plot for instance 301 (registered model):
| 0 | |
|---|---|
| fall | 0.093190 |
| winter | 0.065333 |
| month | 0.050148 |
| dayofweek | 0.006796 |
| spring | 0.004626 |
| holiday_0 | 0.000418 |
| holiday_1 | 0.000318 |
| summer | -0.002423 |
| cloudy | -0.010918 |
| clear | -0.042410 |
| workingday_1 | -0.051314 |
| year_2012 | -0.097488 |
| workingday_0 | -0.129571 |
| day | -0.179526 |
| year_2011 | -0.193205 |
| windspeed | -0.213967 |
| atemp | -0.240135 |
| temp | -0.256095 |
| hum | -0.431524 |
| lightsnow.rain | -0.466357 |
# SHAP force plot for instance 106 (casual model)
shap.initjs()
display(shap.force_plot(explainer_cas.expected_value, shap_values_cas[106], X_original.iloc[106], feature_names=NAMES))
# LIME plot for instance 106 (casual model)
exp_cas = lime_explainer.explain_instance(X_original.iloc[106].values, casmodel.predict, num_features=10)
exp_cas.show_in_notebook(show_all=False)
display(pd.DataFrame(exp_cas.as_list(), columns=['Feature', 'Contribution']))
Intercept -1.140906751178221 Prediction_local [-1.26480615] Right: -1.3963554
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.331523 |
| 1 | workingday_1=0.0 | 0.283622 |
| 2 | workingday_0=1.0 | 0.189713 |
| 3 | year_2012=0.0 | -0.184492 |
| 4 | dayofweek > 5.00 | -0.158336 |
| 5 | holiday_0=1.0 | -0.095198 |
| 6 | lightsnow.rain=0.0 | 0.089329 |
| 7 | spring=1.0 | 0.075710 |
| 8 | clear=1.0 | 0.058392 |
| 9 | holiday_1=0.0 | -0.051116 |
| pos/neg | Working day [instance 11] (key feature) | Non-working day [instance 106] (key feature) | Working day [instance 145] (key feature) | Non-working day [instance 301] (key feature) | ||
|---|---|---|---|---|---|---|
| Casual Customer | LIME (One Day) | pos | lightsnow.rain=0.0, clear=1.0, winter=1.0 | workingday_1=0.0, workingday_0=1.0, spring=1.0, clear=1.0, cloudy=0.0 | lightsnow.rain=0.0, clear=1.0, spring=1.0, cloudy=0.0 | workingday_1=0.0, workingday_0=1.0 |
| neg | year_2011=1.0, workingday_1=1.0, workingday_0=0.0, year_2012=0.0, dayofweek, spring=0.0, holiday_0=1.0 | year_2011=1.0, year_2012=0.0, dayofweek, holiday_0=1.0 | year_2011=1.0, workingday_1=1.0, workingday_0=0.0, year_2012=0.0, dayofweek, holiday_0=1.0 | year_2011=1.0, year_2012=0.0, 3.00 < dayofweek <= 5.00, holiday_0=1.0, clear=0.0, spring=0.0, lightsnow.rain=1.0, holiday_1=0.0 | ||
| SHAP (One Day) | pos | hum, clear | workingday_0, workingday_1, hum | atemp, temp | workingday_0, workingday_1, dayofweek, fall | |
| neg | spring, day, summer | year_2011, windspeed, year_2012 | workingday_0, year_2011, workingday_1 | atemp, windspeed, hum, temp, lightsnow.rain | ||
| Regular Customer | LIME (One Day) | pos | lightsnow.rain=0.0, workingday_0=0.0, clear=1.0, workingday_1=1.0, holiday_1=0.0 | lightsnow.rain=0.0, winter=0.0, clear=1.0 | lightsnow.rain=0.0, workingday_0=0.0, winter=0.0, clear=1.0, workingday_1=1.0, holiday_1=0.0 | winter=0.0, fall=1.0, holiday_1=0.0 |
| neg | year_2011=1.0, year_2012=0.0, winter=1.0, fall=0.0, dayofweek, holiday_0=1.0 | year_2011=1.0, year_2012=0.0, workingday_0=1.0, workingday_1=0.0, fall=0.0, dayofweek, cloudy=0.0 | year_2011=1.0, year_2012=0.0, fall=0.0, dayofweek, holiday_0=1.0 | year_2011=1.0, lightsnow.rain=1.0, year_2012=0.0, workingday_0=1.0, clear=0.0, workingday_1=0.0, 3.00 < dayofweek <= 5.00 | ||
| SHAP (One Day) | pos | hum, workingday_0, dayofweek, clear | hum, winter, temp, atemp | temp, atemp, dayofweek, workingday_0 | fall, winter, month, dayofweek, spring | |
| neg | temp, year_2011, atemp, month | year_2011, workingday_0, year_2012, month | year_2011, year_2012, day, spring, hum | lightsnow.rain, hum, temp, atemp, windspeed |
| Working day | Non-working day | Working day | Non-working day | ||
|---|---|---|---|---|---|
| instance | 11 | 106 | 145 | 301 | |
| avg. casual | 607 | 1371 | 607 | 1371 | |
| # casual rent on this day | 25 | 1558 | 758 | 57 | |
| avg. registered | 3978 | 2959 | 3978 | 2959 | |
| # registered rent on this day | 1137 | 2186 | 3919 | 570 | |
| Date and day | 12-01-2011 | 17-04-2011 | 26-05-2011 | 29-10-2011 | |
| Casual customer on this day | LIME | -1.65 | -1.4 | -1.7 | -1.65 |
| SHAP | -1.21 | 0.88 | -0.12 | -1.89 | |
| Registered customer on this day | LIME | -1.4 | -1.49 | -1.25 | -1.94 |
| SHAP | -1.48 | -0.94 | 0.13 | -2.06 |
For this working day, both LIME and SHAP predict much lower than average rentals for both casual and registered users. The LIME value for casuals is -1.65 and SHAP is -1.21, indicating strong negative contributions. For registered users, LIME is -1.4 and SHAP is -1.48. Both models highlight that working days and the year 2011 are key negative drivers, while clear weather and no rain have small positive effects. This suggests that both user types rent less on working days, especially in 2011, and that good weather can only slightly mitigate this effect.
On this non-working day, the models diverge for casual users: LIME is -1.4 (still negative), but SHAP is +0.88 (positive), indicating that SHAP sees this as a favorable day for casual rentals. For registered users, both LIME (-1.49) and SHAP (-0.94) are negative, but less so than on working days. Key positive features for casuals are non-working day, spring, and clear weather; negatives are year 2011 and holidays. For registered users, clear weather and winter absence help, but year 2011 and non-working day reduce demand.
For this working day, casual LIME is -1.7 (strong negative), but SHAP is -0.12 (almost neutral), suggesting LIME sees a strong negative effect while SHAP sees little. Registered LIME is -1.25, SHAP is +0.13 (slightly positive). For casuals, clear weather and spring help, but working day and year 2011 are negative. For registered, temperature and working day are positive, but year 2011 and fall absence are negative.
Both models predict very low rentals for both user types: casual LIME -1.65, SHAP -1.89; registered LIME -1.94, SHAP -2.06. The main negative drivers are year 2011, bad weather (rain, not clear), and low temperature. For casuals, being a non-working day is not enough to offset these negatives. For registered users, fall and winter presence help slightly, but weather and year effects dominate.
| Instance | User Type | LIME Key Positives | LIME Key Negatives | SHAP Key Positives | SHAP Key Negatives |
|---|---|---|---|---|---|
| 11 | Casual | lightsnow.rain=0, clear=1, winter=1 | year_2011=1, workingday_1=1, holiday_0=1 | hum, clear | spring, day, summer |
| 11 | Registered | lightsnow.rain=0, workingday_1=1, clear=1 | year_2011=1, winter=1, fall=0 | hum, workingday_0, clear | temp, year_2011, atemp |
| 106 | Casual | workingday_0=1, spring=1, clear=1 | year_2011=1, holiday_0=1 | workingday_0, hum | year_2011, windspeed |
| 106 | Registered | lightsnow.rain=0, winter=0, clear=1 | year_2011=1, workingday_0=1 | hum, winter, temp | year_2011, workingday_0 |
| 145 | Casual | lightsnow.rain=0, clear=1, spring=1 | year_2011=1, workingday_1=1 | atemp, temp | workingday_0, year_2011 |
| 145 | Registered | lightsnow.rain=0, workingday_1=1, clear=1 | year_2011=1, fall=0 | temp, atemp, dayofweek | year_2011, day |
| 301 | Casual | workingday_0=1 | year_2011=1, clear=0, lightsnow.rain=1 | workingday_0, dayofweek, fall | atemp, windspeed, hum |
| 301 | Registered | fall=1, holiday_1=0 | year_2011=1, lightsnow.rain=1, clear=0 | fall, winter, month | lightsnow.rain, hum, temp |
instance 106 (Non-working day, high casual rentals)
For instance 106 (casual model), the LIME explanation is negative (-1.4), while the SHAP value is positive (+0.88). This means LIME suggests the model predicts below the mean, but SHAP indicates the model predicts above the mean. Thus, the explanations do not fully align for this instance.
LIME:
workingday_0=1.0, spring=1.0, clear=1.0year_2011=1.0, holiday_0=1.0SHAP:
workingday_0, humyear_2011, windspeedPlots:
shap.initjs()
shap.force_plot(explainer_cas.expected_value, shap_values_cas[106], X_original.iloc[106], feature_names=NAMES)
shap.initjs()
shap.decision_plot(
explainer_cas.expected_value,
shap_values_cas[106],
X_original.iloc[106],
feature_names=NAMES.to_numpy(),
show=True
)
exp_cas = lime_explainer.explain_instance(X_original.iloc[106].values, casmodel.predict, num_features=10)
exp_cas.show_in_notebook(show_all=False)
display(pd.DataFrame(exp_cas.as_list(), columns=['Feature', 'Contribution']))
Intercept -1.2132222124281278 Prediction_local [-1.28761096] Right: -1.3963554
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.337784 |
| 1 | workingday_1=0.0 | 0.276531 |
| 2 | workingday_0=1.0 | 0.188758 |
| 3 | year_2012=0.0 | -0.182657 |
| 4 | dayofweek > 5.00 | -0.145702 |
| 5 | lightsnow.rain=0.0 | 0.075583 |
| 6 | clear=1.0 | 0.070492 |
| 7 | spring=1.0 | 0.059142 |
| 8 | winter=0.0 | -0.041087 |
| 9 | holiday_0=1.0 | -0.037664 |
Yes.
Negative features like year_2011 and windspeed (SHAP), and year_2011=1.0, holiday_0=1.0 (LIME) push the prediction down, but are outweighed by strong positive contributions from workingday_0 (non-working day), spring, and clear weather. These negative features reduce the predicted rental count, but not enough to offset the strong positive drivers, so the final prediction remains above average.
For instance 106, the model predicts a higher-than-average casual rental count, mainly due to it being a non-working day in spring with clear weather. Negative contributions from the year and holiday status are present but not strong enough to reverse the prediction. LIME and SHAP highlight similar key features, but LIME underestimates the positive effect, while SHAP aligns with the actual model output.
Yes, there is a clear example of this: instance 106 (a non-working day with high casual rentals).
Why does this happen?
LIME works by fitting a simple linear model around the selected instance, using randomly perturbed samples. If the model’s decision boundary is highly non-linear or if the local neighborhood isn’t well represented by a linear fit, LIME can misattribute which features are important or even get the direction of the prediction wrong.
SHAP, on the other hand, calculates exact Shapley values for tree-based models like XGBoost. It takes into account all possible combinations of features and captures complex interactions and non-linear effects, so it provides a more faithful breakdown of the model’s actual prediction.
What does this tell us?
Summary:
When you see LIME and SHAP diverge, it’s usually because the model’s decision surface is complex and not well-approximated by a linear model. For tree ensembles like XGBoost, SHAP gives a more accurate and robust explanation, while LIME may be misleading. This highlights the importance of choosing the right explanation tool for your model.
# SHAP Plots & LIME plots (force plot, decision plot)
# Does to local explanation from LIME & SHAP diverge?
# SHAP global
shap.summary_plot(shap_values_cas, X_original, feature_names=NAMES, show=False)
plt.title('SHAP Summary Plot - Casual Users Model', fontsize=16, fontweight='bold')
/tmp/ipython-input-3137738493.py:3: FutureWarning: The NumPy global RNG was seeded by calling `np.random.seed`. In a future version this function will no longer use the global RNG. Pass `rng` explicitly to opt-in to the new behaviour and silence this warning. shap.summary_plot(shap_values_cas, X_original, feature_names=NAMES, show=False)
Text(0.5, 1.0, 'SHAP Summary Plot - Casual Users Model')
# SHAP & LIME local plots
i = 344
#SHAP decision plot (local)
shap.decision_plot(explainer_cas.expected_value, shap_values_cas[i], X_original.iloc[i],
feature_display_range=slice(None, -11, -1), feature_names=NAMES.to_numpy(),
xlim=(-1.0, 1.0), show=False)
plt.title('SHAP Decision Plot - Casual Users Model - Instance 344', fontsize=16, fontweight='bold')
plt.show()
shap.initjs()
#SHAP force plot (local)
force_plot = shap.plots.force(explainer_cas.expected_value, shap_values_cas[i], X_original.iloc[i])
# Save as standalone HTML
shap.save_html("shap_force_plot.html", force_plot)
# Then download and open the file manually
from google.colab import files
files.download("shap_force_plot.html")
# LIME
lime_exp = lime_explainer.explain_instance(X_original.iloc[i].values, casmodel.predict, num_features=10)
lime_exp.show_in_notebook(show_all=False)
pd.DataFrame(lime_exp.as_list(),columns=['Feature','Contribution'])
Intercept -1.1549052069034944 Prediction_local [-1.35629817] Right: -1.4531881
| Feature | Contribution | |
|---|---|---|
| 0 | year_2011=1.0 | -0.323329 |
| 1 | workingday_1=0.0 | 0.278374 |
| 2 | year_2012=0.0 | -0.194914 |
| 3 | workingday_0=1.0 | 0.189416 |
| 4 | dayofweek > 5.00 | -0.159399 |
| 5 | lightsnow.rain=0.0 | 0.117765 |
| 6 | clear=1.0 | 0.061308 |
| 7 | holiday_0=1.0 | -0.060817 |
| 8 | spring=0.0 | -0.058787 |
| 9 | winter=0.0 | -0.051011 |
date = df.index[df['row_number'] == 344]
print(date)
instance_344 = df.loc[date, cols]
print("\nInstance 344:")
print(instance_344)
DatetimeIndex(['2011-12-11'], dtype='datetime64[ns]', name='dteday', freq=None)
Instance 344:
workingday casual registered row_number
dteday
2011-12-11 0 377 2366 344